"""
Title: DoctorSim-v2.jl
Author: WB Macleod
Created: 2019-08-28
Last Edit: 2019-10-13
Description: Simulations for Currie-Macleod paper on Doctor Decision Making
Functions for use in the simulations.

Copyright 2019 by W. B. MacLeod, wbmacleod@gmail.com
This file is part of "CM-DoctorSim".

    "CM-DoctorSim" is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    "CM-DoctorSim"  is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with "CM-DoctorSim"  If not, see <https://www.gnu.org/licenses/>.

"""

# Packages
using DataFrames
using Distributions
using CSV
using LinearAlgebra
using Random
using Primes
using Dates
#=========================================================#
#  Helper Functions                                       #
#=========================================================#

"""
    data-in(DD = DrugData.csv, dir = "./")

Reads in the means and standard deviation of the drugs data from  dir.
Assumes that the standard deviation of the placebo effect is a
measure of the variance of untreated patient.
It is assumed that the covariance of drugs is completely determined by this
effect.  Drug 0 is placebo.
"""
function  DataIn(pcorr = 0.9, DData = "DrugEffects.csv", sig2F = "sigma.csv",  dir = "./")
    DE = CSV.read(dir*DData, header = true);
    mu = DE[:,:ldEffect];  #mean drug effects
    m = length(mu);
    ## var of treatment effect and remove fixed effect variaance
    var = (DE[:,:lsdEffect]).^2
    fevar = pcorr * minimum(var)
    var = var .- fevar
    sig0 = Diagonal(var) + zeros(m,m); # variance of effect and change to Array type.
    mu[1] = 0 #Set drug 1 to zero since sugar pill not prescribed.
    # All patients will experience a placebo effect from treatment.
    # This effect will be correlated across treatment with a size
    # equal to the variance of the placebo effect. Add this to Variance
    # on off diagonals.  Add a bit of noise to correlation to ensure not singular
    sig0 = sig0 .+ fevar #add placebo correlation
    return reshape(mu,m,1),  sig0
end

"""
function  DataInPoly(dcom = 0.6, err = 0.1, DData = "DrugEffects.csv", dir = "./")

Reads in the means and standard deviation of the drugs data from  dir.
Assumes that the standard deviation of the placebo effect is a
measure of the variance of untreated patient.
It is assumed that the covariance of drugs is completely determined by this
effect.  Drug 1 is placebo.
In practice placebo pills are not given so we normalize drug 1 to zero.
It is assume that variance of drug 1 represents underlying condition and
hence present for all treatments.
In this case we create mu0 and Sig0 to provide means and covariancs for all drug choices:
Effect of ij is dcom *(e_i+e_j), with corresponding covariances
Add variance placebo * err to polypharmacy to make Sig0 non-singular
"""
# Try implementing with sets, then order does not matter
function  DataInPoly(dcom = 0.6, err = 0.1, pcorr = 0.0, DData = "DrugEffects.csv", dir = "./")
    DE = CSV.read(dir*DData, header = true);
    mu = DE[:,:ldEffect];  #mean drug effects
    sig = DE[:,:lsdEffect].^2 #variance of effect
    ## Define variance of fixed effect (mean zero by construction)
    fevar = pcorr * minimum(sig)
    sig = sig .- fevar
    ## Add noise term of polypharmacy
    errv = err * sig[1] 
    m = length(mu)
    m1 = Int(factorial(m)/(2*factorial(m-2)) + 1) #number of unique 2 drug combinations
    ## Create a list of sets, index, where index[i]  is a 1 or two element
    ## set defining drug combination.
    index = Set[]
    # Do single drugs first
    for i in 1:m
        S = Set([i])
        push!(index, S)
        ## Add the poly-pharmacy combinations
    end
    for i in 2:(m-1)
        for j in (i+1):m
            S = Set([i, j])
            push!(index, S)
        end
    end
    ## These will be the mean and variance of drug effects
    mu0 = zeros(m1,1) #Thisis a m1 x 1 vector for the deriving normal distribution.
    sig0 = zeros(m1,m1)
         ## Work out mean effect
        ## one drug cases - leave out k=1 since this is added later.
    for k in 1:m
            mu0[k,1] = mu[k,1]; #first drug is placebo effect (assumed patient outcome without drugs)
        ## Work out the 2 drug mean effects
    end
    ## k > m, two or more drugs prescribed - effect is dcom * sum
    for k in (m+1):m1
        mu0[k,1] = dcom*(sum(mu[collect(index[k])]))
    end
     ## 
    ## Now the variance terms placebo added to all, so it do at end
    ##
    for k in 1:m1
        for l in 1:m1
            nS = length(index[k]) + length(index[l]) #covers the 3 cases, 2, 3 and 4
            S = intersect(index[k], index[l])
            if nS == 2
                for i in S
                    sig0[k,l] = sig[i] + sig0[k,l]
                end
            elseif nS == 3
                for i in S
                    sig0[k,l] = dcom * sig[i] + sig0[k,l]
                end
            elseif nS == 4
                for i in S
                    sig0[k,l] = dcom^2 * sig[i] + sig0[k,l]
                end
            end
        end
    end
    #Since docs do not give sugar pills set mean of drug 1 to zero.
    mu0[1,1] = 0
    # Add noise to make polypharmacy effects idiosyncratic, and hence sig0 non-singular
    for i in (m+1):m1
        sig0[i,i] = sig0[i,i] + errv
    end
    ## Add variance from Placebo effect
    sig0 = sig0 .+ fevar
    ## Add a bit of noise to placebo variance as well so correlation is not perfect
    # sig0[1,1] = sig0[1,1] + errv
    return mu0, sig0, index
end



"""
    mu1, rho1 = LL(y, w, rhoe, mu0, rho0)
Linear Learning routine.
 Suppose x ~N(mu0, rho0^-1)
y = w * x + epsilon, Precision epsilon = rhoe, mean zero
Computes updated mean and variance.
Formula from Kay on signal processing.
"""
function LL(y::Float64, w, rhoe::Float64, mu0, Rho0)
    Rho1 = Rho0 + w' * rhoe * w; #could use SymWoodbury here.
    mu1 = mu0 + (inv(Rho1) * w' * (rhoe .* (y .- w * mu0)));
    return mu1, Rho1;
end
"""
struct Patient
    id::Int #id number unique
    g::Array{Float64, 2} #True treatment Effects
    d::Int64   #Optimal drug number

Define Patient Type i that will be treated by doctor j. We can create a dictionary of
## patients who will be treated over time, and then the results of treatment can be added
to a data table for analysis.  Optimal drug is a vector (to all for polypharmacy)
"""
struct Patient
    id::Int #id number unique
    g::Array{Float64, 2} #True treatment Effects
    d::Int   #Optimal drug number
    nsingle::Int #number of single drugs
    dsingle::Int #Optimal single drug. If d != dsingle then polypharmacy is optimal.
end
"""
Create list of patients with random treatment effects.
"""
function P_list(NP, mu0, Sig0, nsingle = length(mu0))
    P = Patient[]
    m = length(mu0)  # number of drugs
    for i in 1:NP
        eff = reshape(rand(MvNormal(mu0[:,1],Sig0)), m,1);
        dm = findmax(eff)[2][1];
        dsingle = findmax(eff[1:nsingle,:])[2][1] #single drug
        push!(P, Patient(i, eff, dm, nsingle, dsingle))
              #findmax returns coordinates in matrix - take column index
    end
    return P
end

"""
mutable struct PDoc
    i::Int #patient id
    t::Int #time or date
    d::Int #Drug prescribed last period.
    y::Float64 Diagnosis.
    m::Array{Float64, 2} # Doctors expected mean effect AFTER interview at data t
    r::Array{Float64, 2} #precision of estimate AFTER interview.
    U::Float64  Value that doc used to choose d
This is the record of patient after seeing the doctor.
Each period doctor updates beliefs given observed condition.
"""
mutable struct PDoc
    id::Int 
    t::Int 
    d::Int
    y::Float64
    m::Array{Float64, 2}
    r::Array{Float64, 2}
    U::Float64
end
"""
     DP_list(P::Array{Patient,1}, mu0, Sig0)
    t = 0; # time set to zero and increased when patient arrives
    d = 1; # 1 is placebo - no drug
    y = 0; # Last diagnosis

This is the function creating the first list of patient data
"""
function PD_list(P::Array{Patient,1}, mu0, Sig0)
    np = length(P);
    PD = PDoc[];
    t = 0;
    d = 1;
    y = 0;
    rho = inv(Sig0)
    U = 0;
    for i in 1:np
        id = P[i].id;
        push!(PD, PDoc(id,t,d,y,mu0,rho,U))
    end
    return PD;
end



"""
    struct Doctor id - id number, Dtype - type of doctor, sk - skill level, pr - reservation pr
Define doctor as a function of their two characteristics and id.
Create a list of distinct doctors
"""
struct Doctor
    id::Int #id number
    Dstyle::String
    Dqual::String
    sk::Float64 #precision of diagnostic ability
    pr::Float64 #reservation probability - should be in [0,1]
end

function D_list(d_qual, d_style)
    D = Doctor[]
    j=0;
    for (k,v) in d_qual
        for (ks, vs) in d_style
            j = j + 1;
            Dstyle = ks;
            Dqual = k;
            push!(D, Doctor(j, Dstyle, Dqual, v, vs))
        end
    end
    return D;
end

"""
    function Treat!(Doctor, Patient, PDoc)
Requires Y() function that returns signal
Observes patient, updates beliefs, chooses drug, and records to patient and public data
"""
function Treat!(D::Doctor, P::Patient, PD::PDoc)
    if P.id != PD.id
        print("Patient record incorrect!\n");
        return
    end
    PD.t = PD.t + 1;
    m0 = PD.m;
    m = length(m0);
    r0 = PD.r;
    y, W = Y(D.sk, P.g, PD.d); #This function returns condition give drug and doctor skill.
    PD.y = y #record diagnosis
    # Update beliefs for doctor.
    PD.m, PD.r = LL(y, W, D.sk, m0, r0);
    #Compute new variance and Q value
    U = PD.m + quantile(Normal(),D.pr)*reshape(diag(inv(PD.r)), m,1)
    #optimal choice.
    V = findmax(U);
    PD.d = V[2][1];  #optimal drug
    PD.U = P.g[PD.d] - P.g[P.d] #Payoff at optimal drug
    return
end

#===================================================================
Next step is to work out decision rules.
Here we will be looking at two cases.
First with one drug, and then with polypharmacy
We poly pharmacy we just have to change the initial beliefs to
include two drugs and function Y.  I can a code at the begining
to signal this.

====================================================================#

"""
    function Y(sk = skill - precision, G = true condition, d = drug given)
    return y,W
This function returns the noisy signal to the doctor based upon drug.
y is the signal and W is 1 x m matrix representing the drug choose (1 in position d)
"""
function Y(sk, G, d)
    m = length(G);
    W = reshape(zeros(m), 1,m); #Create empty W matrix
    W[1,d] = 1; #choice
    W[1,1] = 1 # unobserved patient condition. Notice if d=1, then this is still 1.
    y = (W * G)[1] + randn()/sqrt(sk);  # sk = 1/sig^2 - adjust noise
    return y, W;
end

"""
    function Monthly!(D::Doctor, Plist::Patient[], PD::Doc[], df)
Computes treatment for a list of patients ones, computes entropy, average
Utility and fraction of correct calls  per doctor, and add to df
"""
function Monthly!(D::Doctor, Plist::Array{Patient, 1},
                  PD::Array{PDoc, 1}, df::DataFrame, Poly::Int = 0, ind = Float64[])
    ni = length(Plist)
    Nopt = 0; #number of optimal choices
    nd = length(Plist[1].g); #number of drug choices
    dj = zeros(nd); #count on choice distribution
    U = 0; #total utility
    PolyOpt = 0.0
    if Poly == 0
        for i in 1:ni
            global Gi = i
            dopt = Plist[i].d;
            Treat!(D, Plist[i], PD[i])
            d = PD[i].d #New choice
            Nopt = Nopt + (d == dopt)
            dj[d] = dj[d] + 1;
            U = U + PD[i].U;
        end
    else
        for i in 1:ni
            global Gi = i
            dopt = Plist[i].d;
            Treat!(D, Plist[i], PD[i])
            d = PD[i].d #New choice
            Nopt = Nopt + (d == dopt)
            PolyOpt = PolyOpt + (dopt != Plist[i].dsingle)
            dj[d] = dj[d] + 1;
            U = U + PD[i].U;
        end
        dj = ind * dj #convert drugs into single drug use.
    end
    println("Doctor Prescriptions in month: ", Gm, " Drugs: ", dj, "\n")
    en = entropy(dj./sum(dj), length(dj))
    push!(df, [Gr  Gm D.id D.Dstyle D.Dqual en U/ni Nopt/ni Poly PolyOpt/ni])
end

